# 一级分类 //h3[@class="classify_c_h3"]/a/text()
# 一级分类模糊查询 //div[contains(@class,"classify_list")]/span/a/text()
# 二级分类 //div[@class='classify_list']/span/a/text()
import lxml.etree as le

with open('edu.htm', 'r', encoding='utf-8') as f:
    html = f.read()
    #html转换成xml对象
    html_x = le.HTML(html)
    div_x_s = html_x.xpath('//div[@class="classify_cList"]')
    data_s = []
    for div_x in div_x_s:
        #获取一级分类
        catagory1 = div_x.xpath('./h3/a/text()')[0]
        # print(catagory1)
        #获取二级分类
        catagory2 = div_x.xpath('./div/span/a/text()')
        # print(catagory2)
        data_s.append(
            {
                'catagory':catagory1,
                'course':catagory2
            }
        )
    # print(data_s)
    for d in data_s:
        print(d.get('catagory'))
        for c in d.get('course'):
            print('\t',c)